In [1]:
from eurostat_dataset import eurostat_dataset # packed in class the method to wrangle eurostat's table 
import pandas as pd
import sqlite3
import plotly.express as px
import plotly.offline as pyo
import warnings
warnings.filterwarnings("ignore")
# The result can be printed to html by nbconvert. For this:
#   - pip install nbconvert -U   in the environment
#   - jupyter nbconvert Eurostat.ipynb --no-input --to html --no-prompt   - in the terminal
# to activate env: venv\Scripts\activate.bat
# python -m pip freeze > requirements.txt
  1. Processing the Final energy consumption data
In [2]:
# uncheck this comment to download dataset to the sqlite database
# eurostat_dataset(code='NRG_BAL_S').WriteToDatabase()
# eurostat_dataset(code='NRG_INF_EPCRW').WriteToDatabase()
# write to database, so you don't need to wait everytime when you restart the kernel or the file
In [3]:
#check the tables that has been written to the database:
def db_tables():
    con = sqlite3.connect('ngr.db')
    cursor = con.cursor()
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    print(cursor.fetchall())
    con.close
In [4]:
# read table from db to DataFrame
def read_df_from_db(table):
    con = sqlite3.connect('ngr.db')
    cursor = con.cursor()   
    df = pd.read_sql_query("SELECT * FROM "+ table , con)
    con.close
    df.drop(columns=['index'],inplace=True)

    return df
In [5]:
tables = db_tables()
print(f'The list of tables in the local sqlite database {tables}') # we can see the tables in the database 'ngr.db'
[('nrg_bal_s',), ('nrg_inf_epcrw',)]
The list of tables in the local sqlite database None
In [6]:
renew = eurostat_dataset(code='nrg_inf_epcrw').DatasetInfo()
dataset_info=eurostat_dataset(code='NRG_BAL_S').DatasetInfo()
In [7]:
print(f'The first dataset: "{dataset_info[0]}" dates from {dataset_info[1]} to {dataset_info[2]}.')
print(f'The second dataset: "{renew[0]}" dates from {renew[1]} to {renew[2]}.')
The first dataset: "Simplified energy balances" dates from 1990 to 2020.
The second dataset: "Electricity production capacities for renewables and wastes" dates from 1990 to 2020.
In [8]:
#reading the table of the first dataset from local database
bal = read_df_from_db('nrg_bal_s')
balance=bal.copy()
balance = balance.query('unit=="Gigawatt-hour" and siec=="Total" and nrg_bal=="Final consumption - energy use" \
and geo not in ("Euro area - 19 countries  (from 2015)","European Union - 27 countries (from 2020)")')
get_energy = read_df_from_db('nrg_inf_epcrw')

Final consumption - energy use (Gigawatt-hour)

In [9]:
balance.head()
Out[9]:
nrg_bal siec unit geo date value
5778 Final consumption - energy use Total Gigawatt-hour Albania 2020 21474.051
5779 Final consumption - energy use Total Gigawatt-hour Austria 2020 288624.871
5780 Final consumption - energy use Total Gigawatt-hour Bosnia and Herzegovina 2020 46804.808
5781 Final consumption - energy use Total Gigawatt-hour Belgium 2020 360985.528
5782 Final consumption - energy use Total Gigawatt-hour Bulgaria 2020 110633.154
In [10]:
balance.value=pd.to_numeric(balance.value)
balance.date=pd.to_numeric(balance.date)
In [11]:
#I like plotly. It is more interactive)
# https://plotly.com/python/plotly-express/#gallery
pyo.init_notebook_mode() #trying to render the plot in github, but it is warking only in nbviewer!https://nbviewer.org/
fig = px.line(balance, x="date", y="value", color="geo",title="Final consumption - energy use,Gigawatt-hour")

pyo.iplot(fig)
In [12]:
#get population dataset and clean
pop = eurostat_dataset('TPS00001').GetDf()
population=pop.copy()
population['value'] = population.value.astype('str').fillna('').str.extract(r"(\d+)|(\d+)\.")[0].astype('Int32')
population['date'] = pd.to_numeric(population.date)
In [13]:
#merge with balance tables
m_balance = balance.merge(population[["geo","date","value"]],on=['geo',"date"],suffixes=("", "_population"))
m_balance = m_balance.assign(per_capita=lambda x: x.value*1000/x.value_population)
In [14]:
pyo.init_notebook_mode()
fig = px.line(m_balance, x="date", y="per_capita", color="geo",
                title="Final consumption - energy use per capita,Megawatt-hour")

pyo.iplot(fig)
In [15]:
get_energy.head()
Out[15]:
siec plant_tec unit geo date value
0 Solid biofuels Net maximum electrical capacity Megawatt Albania 2020 0.000
1 Solid biofuels Net maximum electrical capacity Megawatt Austria 2020 816.199
2 Solid biofuels Net maximum electrical capacity Megawatt Bosnia and Herzegovina 2020 1.120
3 Solid biofuels Net maximum electrical capacity Megawatt Belgium 2020 563.500
4 Solid biofuels Net maximum electrical capacity Megawatt Bulgaria 2020 15.064
In [16]:
"Gross electricity production"
el_production = bal.query('unit=="Gigawatt-hour" and siec=="Total" and nrg_bal=="Gross electricity production" \
and geo not in ("Euro area - 19 countries  (from 2015)","European Union - 27 countries (from 2020)")')
el_production.date=pd.to_numeric(el_production.date)
  1. Energy Consumption, electricity production and electricity production from renewable
As for Latvia in 2020:¶
- Energy consumption: 44172.893 Gigawatt-hour¶
- electricity production - 5724.846 Gigawatt-hour. 13% from Energy consumption¶
- electricity production from renewable - 3649.529 Gigawatt-hour. 63% from electricity production¶
- 87% electricity production from the Hydro¶
In [17]:
final_cons = balance[['geo','date','value']].rename(columns={'value':'Final consumption'}).set_index(['geo','date']).squeeze()
In [18]:
el_prod = el_production[['geo','date','value']].rename(columns={'value':'Electricity production'}).set_index(['geo','date']).squeeze()
In [19]:
renewables = bal.query('unit=="Gigawatt-hour" and siec=="Renewables and biofuels" and nrg_bal=="Gross electricity production" \
and geo not in ("Euro area - 19 countries  (from 2015)","European Union - 27 countries (from 2020)")')
renewables.date=pd.to_numeric(renewables.date)
renSeries = renewables[['geo','date','value']].rename(columns={'value':'Electricity production from Renewables and biofuels'}).set_index(['geo','date']).squeeze()
In [45]:
ren_table = get_energy.query('siec in ("Solid biofuels","Biogases","Hydro","Geothermal","Wind","Solar") \
    and geo not in ("Euro area - 19 countries  (from 2015)","European Union - 27 countries (from 2020)")')
ren_table.date=pd.to_numeric(ren_table.date)
ren_table['perc'] = ren_table['value'] / ren_table.groupby(['geo','date'])['value'].transform('sum')
ren_table['max_perc'] = ren_table.groupby(['geo','date'])['perc'].transform(max)

reneable_source_name = ren_table.query('perc==max_perc')[['geo','date','siec']].drop_duplicates(subset=['geo','date'])\
                        .rename(columns={'siec':'Main Renewable Source'}).set_index(['geo','date']).squeeze()

reneable_source_perc = ren_table.query('perc==max_perc')[['geo','date','perc']].drop_duplicates(subset=['geo','date'])\
                        .rename(columns={'perc':'Main Renewable Source,perc'}).set_index(['geo','date']).squeeze()
concated = pd.concat([final_cons,el_prod,renSeries,reneable_source_name,reneable_source_perc],axis=1).reset_index()
concated['Electricity production'] = pd.to_numeric(concated['Electricity production'])
concated['Electricity production from Renewables and biofuels'] = pd.to_numeric(concated['Electricity production from Renewables and biofuels'])
concated["Electricity production from Final consumption,perc"] = concated['Electricity production']/concated['Final consumption']
concated["Electricity production from Renewables and biofuels,perc"] = concated['Electricity production from Renewables and biofuels']/concated['Electricity production']
In [46]:
cols = concated.columns.to_list()
new_cols = cols[:5]+cols[7:9]+cols[5:7]
In [48]:
concated = concated[new_cols]
In [49]:
concated
Out[49]:
geo date Final consumption Electricity production Electricity production from Renewables and biofuels Electricity production from Final consumption,perc Electricity production from Renewables and biofuels,perc Main Renewable Source Main Renewable Source,perc
0 Albania 2020 21474.051 5313.166 5313.166 0.247423 1.000000 Hydro 0.991279
1 Austria 2020 288624.871 72556.246 58779.511 0.251386 0.810123 Hydro 0.700284
2 Bosnia and Herzegovina 2020 46804.808 16874.000 4983.000 0.360519 0.295306 Hydro 0.947741
3 Belgium 2020 360985.528 88890.800 24458.500 0.246245 0.275152 Solar 0.448351
4 Bulgaria 2020 110633.154 40731.058 7977.586 0.368163 0.195860 Hydro 0.646253
... ... ... ... ... ... ... ... ... ...
1235 Slovakia 1990 161007.115 26132.000 2515.000 0.162303 0.096242 NaN NaN
1236 Türkiye 1990 438615.366 57543.000 23228.000 0.131192 0.403663 Hydro 0.997346
1237 Ukraine 1990 1671235.901 298835.000 10723.000 0.178811 0.035883 NaN NaN
1238 United Kingdom 1990 1474621.763 319737.000 7793.000 0.216826 0.024373 Hydro 0.974981
1239 Kosovo (under United Nations Security Council ... 1990 NaN NaN NaN NaN NaN NaN NaN

1240 rows × 9 columns

In [53]:
pyo.init_notebook_mode()
fig = px.line(concated, x="date", y="Electricity production from Renewables and biofuels,perc", color="geo",
                title="Electricity production from Renewables and biofuels,perc")
pyo.iplot(fig)
In [55]:
pyo.init_notebook_mode()
fig = px.line(concated, x="date", y="Electricity production from Renewables and biofuels,perc", color="geo",
                title="Electricity production from Renewables and biofuels,perc")
pyo.iplot(fig)